{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Tuning an Ada Boost Regressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Load this line within an IPython notebook to visualize within the notebook\n",
    "%matplotlib inline \n",
    "\n",
    "from __future__ import division #Load within Python 2.7 for regular division\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from sklearn.datasets import fetch_california_housing\n",
    "\n",
    "cali_housing = fetch_california_housing()\n",
    "\n",
    "X = cali_housing.data\n",
    "y = cali_housing.target\n",
    "\n",
    "#bin output variable to split training and testing sets into two similar sets\n",
    "bins = np.arange(6)\n",
    "binned_y = np.digitize(y, bins)\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=binned_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomizedSearchCV(cv=3, error_score='raise',\n",
       "          estimator=AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',\n",
       "         n_estimators=50, random_state=None),\n",
       "          fit_params=None, iid=True, n_iter=10, n_jobs=-1,\n",
       "          param_distributions={'n_estimators': [50, 100], 'loss': ['linear', 'square', 'exponential'], 'learning_rate': [0.01, 0.05, 0.1, 0.3, 1]},\n",
       "          pre_dispatch='2*n_jobs', random_state=None, refit=True,\n",
       "          return_train_score=True, scoring=None, verbose=0)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.ensemble import AdaBoostRegressor\n",
    "from sklearn.model_selection import RandomizedSearchCV\n",
    "\n",
    "param_dist = {\n",
    " 'n_estimators': [50, 100],\n",
    " 'learning_rate' : [0.01,0.05,0.1,0.3,1],\n",
    " 'loss' : ['linear', 'square', 'exponential']\n",
    " }\n",
    "\n",
    "pre_gs_inst = RandomizedSearchCV(AdaBoostRegressor(),param_distributions = param_dist,\n",
    " cv=3,\n",
    " n_iter = 10,\n",
    " n_jobs=-1)\n",
    "\n",
    "pre_gs_inst.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'learning_rate': 0.05, 'loss': 'linear', 'n_estimators': 100}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pre_gs_inst.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AdaBoostRegressor(base_estimator=None, learning_rate=0.05, loss='linear',\n",
       "         n_estimators=3000, random_state=None)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "param_dist = {\n",
    " 'n_estimators': [100],\n",
    " 'learning_rate' : [0.04,0.045,0.05,0.055,0.06],\n",
    " 'loss' : ['linear']\n",
    " }\n",
    "\n",
    "import copy\n",
    "ada_best = copy.deepcopy(pre_gs_inst.best_params_)\n",
    "ada_best['n_estimators'] = 3000\n",
    "\n",
    "rs_ada = AdaBoostRegressor(**ada_best)\n",
    "rs_ada.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "R-squared 0.539456765683\n",
      "MAE :  0.652927521894\n",
      "MAPE :  0.479136103862\n"
     ]
    }
   ],
   "source": [
    "y_pred = rs_ada.predict(X_test)\n",
    "\n",
    "from sklearn.metrics import r2_score, mean_absolute_error\n",
    "\n",
    "print \"R-squared\",r2_score(y_test, y_pred)\n",
    "print \"MAE : \",mean_absolute_error(y_test, y_pred)\n",
    "print \"MAPE : \",(np.abs(y_test - y_pred)/y_test).mean()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
